import scanpy as sc, anndata as ad, numpy as np, pandas as pd
from scipy import sparse
from anndata import AnnData
import warnings
import socket
from matplotlib import pylab
import sys
import yaml
import os
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')
import ipynbname
try:
nb_fname = ipynbname.name()
except:
nb_fname = "".join(os.path.basename(globals()["__vsc_ipynb_file__"]).split(".")[:-1])
%matplotlib inline
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=50, facecolor='white', dpi_save=500)
pylab.rcParams['figure.figsize'] = (9, 9)
scanpy==1.8.1 anndata==0.7.6 umap==0.4.6 numpy==1.20.2 scipy==1.6.3 pandas==1.2.4 scikit-learn==0.24.2 statsmodels==0.13.1 python-igraph==0.9.8 louvain==0.7.1 pynndescent==0.5.5
hostRoot = "-".join(socket.gethostname().split('-')[0:2])
outdir="./outdir"
markerGenes = "./data/resources/marker_genes.tsv"
outdirFigures= "./figures"
adata = sc.read_h5ad(outdir+"/02C_Preprocess_final.h5ad")
adata
AnnData object with n_obs × n_vars = 18822 × 28371
obs: 'dataset', 'organoid', 'region', 'type', 'type_region', 'regionContrast', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'n_genes', 'is.Stressed'
var: 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'highly_variable', 'mean', 'std'
uns: 'dataset_colors', 'diffmap_evals', 'draw_graph', 'is.Stressed_colors', 'neighbors', 'organoid_colors', 'pca', 'regionContrast_colors', 'region_colors', 'type_colors', 'umap'
obsm: 'X_diffmap', 'X_draw_graph_fa', 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'connectivities', 'distances'
adata.obs.columns
Index(['dataset', 'organoid', 'region', 'type', 'type_region',
'regionContrast', 'n_genes_by_counts', 'log1p_n_genes_by_counts',
'total_counts', 'log1p_total_counts', 'total_counts_mt',
'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo',
'log1p_total_counts_ribo', 'pct_counts_ribo', 'n_genes', 'is.Stressed'],
dtype='object')
for group in adata.obs.organoid.unique():
sc.pl.umap(adata,color="organoid", groups=[group], size = 30, na_color="lightgrey", save=nb_fname+"."+group+".pdf", frameon=False)
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control1.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid1.pdf
for group in adata.obs.type.unique():
sc.pl.umap(adata,color="type", groups=[group], size = 30, na_color="lightgrey", save=nb_fname+"."+group+".pdf", frameon=False)
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid.pdf
for region in adata.obs.region.unique():
sc.pl.umap(adata,color="region", groups=[region], size = 30, na_color="lightgrey", save=nb_fname+"."+region+".pdf", frameon=False)
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.piece2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.piece1.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.piece3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.medial.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.distal.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.proximal.pdf
for dataset in adata.obs.dataset.unique():
sc.pl.umap(adata,color="dataset", groups=[dataset], size = 30, na_color="lightgrey", save=nb_fname+"."+dataset+".pdf", frameon=False)
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control3_piece2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control1_piece1.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control2_piece2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control3_piece1.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control1_piece3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control1_piece2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control2_piece3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control3_piece3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control2_piece1.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid3_medial.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid2_medial.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid3_distal.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid1_distal.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid2_distal.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid1_medial.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid1_proximal.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid2_proximal.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid3_proximal.pdf
for organoid in adata.obs.organoid.unique():
sc.pl.umap(adata,color="organoid", groups=[organoid], size = 30, na_color="lightgrey", save=nb_fname+"."+organoid+".pdf", frameon=False)
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control1.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid1.pdf
sc.tl.leiden(adata, resolution=.3, key_added="leiden0.3")
running Leiden clustering
finished: found 12 clusters and added
'leiden0.3', the cluster labels (adata.obs, categorical) (0:00:02)
sc.pl.umap(adata, color=["leiden0.3"], ncols = 2, size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False, save=nb_fname+".complete.pdf")
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.complete.pdf
leiden3Mapping = {"0":"Encs-1",
"1":"CBC/BRCs-1",
"2":"EXN-1",
"3":"EnCs-2",
"4":"RGCs-1",
"5":"RGCs-2",
"6":"ccRGCs",
"7":"inPCs",
"8":"EXN-2",
"9":"RtCs",
"10":"ER-Cs",
"11":"CBC/BRCs-2"}
adata.obs["leidenAnno"] = adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()
sc.pl.umap(adata, color=["leiden0.3","leidenAnno"], ncols = 2, size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False, save=nb_fname+".complete.annpo.pdf")
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.complete.annpo.pdf
sc.tl.rank_genes_groups(adata, groupby="leidenAnno", method="wilcoxon")
ranking genes
finished: added to `.uns['rank_genes_groups']`
'names', sorted np.recarray to be indexed by group ids
'scores', sorted np.recarray to be indexed by group ids
'logfoldchanges', sorted np.recarray to be indexed by group ids
'pvals', sorted np.recarray to be indexed by group ids
'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:44)
pylab.rcParams['figure.figsize'] = (7, 7)
sc.pl.rank_genes_groups(adata)
pylab.rcParams['figure.figsize'] = (9, 9)
sc.pl.rank_genes_groups_dotplot(adata, n_genes=5, values_to_plot='logfoldchanges', min_logfoldchange=3, vmax=7, vmin=-7, cmap='bwr',standard_scale='var',save=nb_fname+".topMarkers.pdf")
WARNING: dendrogram data not found (using key=dendrogram_leidenAnno). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently.
using 'X_pca' with n_pcs = 50
Storing dendrogram info using `.uns['dendrogram_leidenAnno']`
WARNING: saving figure to file figures/dotplot_03_Dimreduct_exploration.topMarkers.pdf
#Save all genes for each leiden
allMarkers = {}
for i in adata.obs["leidenAnno"].unique():
allMarkers[i] = sc.get.rank_genes_groups_df(adata, group=i)
allMarkers[i]["leidenAnno"] = i
allMarkers[i] = allMarkers[i][(allMarkers[i].logfoldchanges > 3) & ( allMarkers[i].pvals_adj < 0.05)]
allMarkers = pd.concat(allMarkers, ignore_index=True)
allMarkers.to_excel(outdir+"/"+nb_fname+".SCwilcoxon.LeidenMarkers.xls")
#allMarkers.to_excel()
# matrixplot style
topRankedDict = { i: sc.get.rank_genes_groups_df(adata, group=i).head(5)["names"].tolist() for i in adata.obs["leidenAnno"].unique()}
sc.pl.matrixplot(adata, topRankedDict, "leidenAnno", dendrogram=True,swap_axes=False,
colorbar_title='mean z-score',cmap='RdBu_r', standard_scale="var", save=nb_fname+".topMarkers.matrixStyle.pdf")
WARNING: saving figure to file figures/matrixplot_03_Dimreduct_exploration.topMarkers.matrixStyle.pdf
sc.pl.pca_variance_ratio(adata)
sc.tl.dendrogram(adata,"leidenAnno", n_pcs=15)
sc.pl.correlation_matrix(adata, 'leidenAnno', figsize=(10,8),cmap="PRGn", show_correlation_numbers=False,save=nb_fname+".correlation.pdf")
using 'X_pca' with n_pcs = 15 Storing dendrogram info using `.uns['dendrogram_leidenAnno']` WARNING: saving figure to file figures/correlation_matrix03_Dimreduct_exploration.correlation.pdf
sc.pl.umap(adata, color=["leiden0.3","leidenAnno"], ncols = 2, size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False)
adata.write_h5ad(outdir+"/3_polaroid_quickAnno.h5ad")